#!/usr/bin/env python
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import os
import argparse
import pandas as pd

from download_utils import download_file, md5_checksum, extract

parser = argparse.ArgumentParser(
    description="Download, verify and extract dataset files"
)
parser.add_argument(
    "csv", type=str, help="CSV file with urls and checksums to download."
)
parser.add_argument("dest", type=str, help="Download destnation folder.")
parser.add_argument(
    "-e",
    type=str,
    default=None,
    help="Extraction destnation folder. Defaults to download folder if not provided",
)
parser.add_argument(
    "--skip_download", action="store_true", help="Skip downloading the files"
)
parser.add_argument(
    "--skip_checksum",
    action="store_true",
    help="Skip checksum")
parser.add_argument(
    "--skip_extract",
    action="store_true",
    help="Skip extracting files")
args = parser.parse_args()
args.e = args.e or args.dest


df = pd.read_csv(args.csv, delimiter=",")


if not args.skip_download:
    for url in df.url:
        fname = url.split("/")[-1]
        print("Downloading %s:" % fname)
        download_file(url=url, dest_folder=args.dest, fname=fname)
else:
    print("Skipping file download")


if not args.skip_checksum:
    for index, row in df.iterrows():
        url = row["url"]
        md5 = row["md5"]
        fname = url.split("/")[-1]
        fpath = os.path.join(args.dest, fname)
        print("Verifing %s: " % fname, end="")
        ret = md5_checksum(fpath=fpath, target_hash=md5)
        if not ret:
            raise ValueError(f"Checksum for {fname} failed!")
        else:
            print(f"Checksum correct for {fname}")
else:
    print("Skipping checksum")


if not args.skip_extract:
    for url in df.url:
        fname = url.split("/")[-1]
        fpath = os.path.join(args.dest, fname)
        print("Decompressing %s:" % fpath)
        extract(fpath=fpath, dest_folder=args.e)
else:
    print("Skipping file extraction")
